*Replication file 01c_Onset_limited_sample_US
*Article: Counterfactual Coercion: Could harsher sanctions against Russia have prevented the worst?
*Authors: Thies Niemeier, Gerald Schneider


***************************************************************
***US***
***************************************************************

set seed 1234

*Prepare data
use "Dataset.dta", clear
keep if sender=="US"

* Independent Variables
gen ln_oil_gas_value_2014 = ln(oil_gas_value_2014+1)
gen sender_colony=US_colony
gen sender_trade = ln_US_Trade_COW
gen coup_dummy = coup1
replace coup_dummy = 0 if coup_dummy == 1
replace coup_dummy = 1 if coup_dummy == 2

* Dependent variable : 1 if a threat or sanction case started in the dyad
xtset ccodecow year
replace caseid=0 if caseid==.
gen sanctiononset = (caseid-l.caseid)
replace sanctiononset=1 if sanctiononset > 1 & !missing(sanctiononset)
replace sanctiononset=0 if sanctiononset < 0
replace sanctiononset=. if sanctiononset == 0 & (sanction_dyad == 1 | threat_dyad == 1)
tab sanctiononset
gen sanction_test = sanctiononset if year > 2009

* lag time-series variables
sort ccodecow year
by ccodecow: gen l_v2x_polyarchy = v2x_polyarchy[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_gd_ptss = gd_ptss[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_coup_dummy = coup_dummy[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_one_sided_violence = one_sided_violence[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_conflict = conflict[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_mid_terr_integrity = mid_terr_integrity[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_ln_GDPpc_imputed = ln_GDPpc_imputed[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_sender_trade = sender_trade[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_ln_oil_gas_value = ln_oil_gas_value_2014[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_defense_alliance = defense_alliance[_n-1] if year==year[_n-1]+1

* create dummy variables
tabulate l_gd_ptss, generate (pol_terr)

** Filter for cases of importance
keep if pot_sanctioned_countries == 1 & !missing(sanctiononset)

* sortieren nach Jahr, zur Vorbereitung RF model
gen u=0
replace u=1 if year >= 2009
sort u

** Imposition
* Random Forest Model
rforest sanctiononset l_v2x_polyarchy pol_terr* l_coup_dummy ///
l_one_sided_violence l_conflict l_mid_terr_integrity ///
l_ln_GDPpc_imputed l_sender_trade l_ln_oil_gas_value ///
sender_colony l_defense_alliance in 1/2019, type(class) iter(1500) numvars(15)

* Variable Importance
ereturn list
matrix list e(importance)
* write Variable importance to excel file
putexcel set "Supplemental_Material\Variable_Importance\Variable_Importance_Onset_US_RF.xlsx", sheet("M") replace
putexcel A1=matrix(e(importance)), names

* Predictions
predict randonsUS
predict randonsUS0 randonsUS1, pr

* Confusion Matrix
* Sensitivity 20, Specificity 95.76
diagtest sanction_test randonsUS
tab2xl sanction_test randonsUS using Supplemental_Material\Prediction_Output\Confusion_Matrixes\US_Onset_RF, row(1) col(1)

* Kappa .08
kap sanction_test randonsUS

* AUPR .08
prtab sanction_test randonsUS1 
graph save "Graph" "Supplemental_Material\Prediction_Output\Roc-curves\AUPR_Onset_RF_US.gph", replace

